package uk.ac.ebi.pride.peptidome;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.ebi.pride.pubmed.PubMedUtils;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Download the summary date from pubmed, and format it.
 * <p/>
 * User: rwang
 * Date: 03/08/2011
 * Time: 16:42
 */
public class PubMedFetcher {

    private final static Logger logger = LoggerFactory.getLogger(PubMedFetcher.class);

    public static String fetch(String pubmedId) {
        String pubmedSummary = null;

        try {
            String summaryRaw = PubMedUtils.getPubMedSummary(pubmedId);
            if (summaryRaw != null && !"".equals(summaryRaw.trim())) {
                pubmedSummary = formatRawSummary(summaryRaw);
            }
        } catch (IOException e) {
            logger.error("Failed to get pubmed summary", e);
        }

        return pubmedSummary;
    }

    private static String formatRawSummary(String rawSummary) {
        String summary = rawSummary;

        Pattern pattern = Pattern.compile("1:[\\s]*(.*)[\\s]*PMID:.*");
        Matcher m = pattern.matcher(summary);
        if (m.matches()) {
            summary = m.group(1);
        }

        return summary;
    }

    public static void main(String[] args) {
        System.out.println(PubMedFetcher.fetch("16038019"));
    }
}
